Packages

pacman::p_load(
  dplyr,
  tidyverse,
  readxl,
  ggplot2,
  stringr,
  viridis,
  rstatix,
  PoweR,
  ggpubr)

# setwd("D:/PhD/01_Data/02_Probiotics/Model_2") # Windows
# setwd("/Volumes/Yiming_Wang/PhD/01_Data/02_Probiotics/Model_2") # Mac
# setwd("/Users/godric/Desktop/Fut2 review comments/Revision") #Macbook pro
setwd("/Users/godric/Documents/GitHub/2024_ISME_Fut2_Bifidobacterium/01_Data")
df_all_model2 <- read_excel("Model_2_probiotic.xlsx") 

Abx+Infantis

df_detection_limit_1<-df_all_model2 %>%
  filter(Group=="ABX_PRO" & Treatment== "Infantis") %>% 
  filter(Timepoint == "Last_abx_day") %>%  
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>%
  select(Mice_ID,Copies_DNA_log)

df_shape_1 <- df_all_model2 %>% 
  filter(Group=="ABX_PRO" & Treatment== "Infantis") %>% 
  filter(Timepoint != "Last_abx_day") %>%
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>% 
  mutate(Day = as.numeric(gsub("D","",gsub("_post_gavage","",Timepoint)))) %>%
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  select(Day,Genotype,Sex,GenotypeSex,Mice_ID,Copies_DNA_log) %>%
  inner_join(df_detection_limit_1,by="Mice_ID") %>%
  rename(Copies_DNA_log = Copies_DNA_log.x,
         Copies_DNA_log_dl = Copies_DNA_log.y) %>%
  arrange (Mice_ID, Day) %>%
  group_by (Mice_ID) %>%
  mutate(Day_difference = Day - lag(Day)) %>%
  mutate(l_difference = Copies_DNA_log - Copies_DNA_log_dl) %>%
  mutate(area = (l_difference + lag(l_difference)) * Day_difference/2)

# Area and line data
## All genotype x sex type
area_1 <- df_shape_1 %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_1<-df_shape_1 %>%
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log))

## Male (WT vs KO)
area_2 <- area_1 %>% 
  filter(Sex=="Male")

line_data_2<-line_data_1 %>% 
  filter(GenotypeSex == "WT Male" | GenotypeSex == "KO Male")

# Figure
## Male (WT vs KO)  
figure_2 <- ggplot(data=line_data_2, aes(x=Day,y=Copies_DNA_log_mean,group=GenotypeSex))+
  geom_line(size=1,aes(color=GenotypeSex)) +
  geom_point(size=2,aes(color=GenotypeSex,shape=GenotypeSex))+
  geom_pointrange(aes(ymin=Copies_DNA_log_mean-Copies_DNA_log_sd, ymax=Copies_DNA_log_mean+Copies_DNA_log_sd,color=GenotypeSex)) +
  theme_bw() +
  labs(x="Time (Days post-gavage)",y=expression(atop("B. infantis abundance",paste (log[10]*"copies/ng faces DNA"),caption = ""))) + 
  theme(legend.position = "bottom", # right,left, bottom
        legend.title = element_text(face="plain", size = 14), 
        legend.text = element_text(face="plain", size = 14),
        axis.text.x=element_text(colour="black", face="plain", size=17), 
        axis.text.y=element_text(colour="black", face="plain", size=17),
        axis.title.x=element_text(margin = margin(t = 5), size=17),
        axis.title.y=element_text(margin = margin(r = 5), size=17),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_x_continuous(limits = c(0,14),breaks = seq(2,14,2))+
  scale_y_continuous(limits = c(0,7),breaks = seq(0,7,1))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2")) + 
  geom_hline(yintercept = median(df_shape_1$Copies_DNA_log_dl),linetype="dashed",color=c("#E7A79BFF"))+
  annotate("text",x=7,y=7,
           label = expression(italic("P") == "0.028"),hjust=0.5,size=6)

  figure_2 

# Stats
area_WTMale <- area_1 %>% 
  filter(GenotypeSex == "WT Male")

area_KOMale <- area_1 %>% 
  filter(GenotypeSex == "KO Male")



## D'Agostino-Pearson normality test ("omnibus K2" test)
  ##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen
statcompute(6,area_WTMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 0.1289598
## 
## $pvalue
## [1] 0.9375549
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_KOMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 3.296396
## 
## $pvalue
## [1] 0.1923963
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
set.seed(123)

## Summary
Summary_area <- area_1 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(total_area, na.rm=TRUE),
    sd=sd(total_area, na.rm=TRUE),
    median= median (total_area, na.rm=TRUE),
    IQR = IQR (total_area,na.rm=TRUE),
    IQR25 = quantile(total_area, 0.25),
    IQR75 = quantile(total_area, 0.75)
  )
Summary_area
## # A tibble: 4 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  31.1  4.11   31.1  3.96  29.0  32.9
## 2 WT Female       8  30.4  4.72   31.7  4.00  28.9  32.9
## 3 KO Male         8  26.2  5.15   25.5  9.32  21.8  31.1
## 4 KO Female       8  28.7  1.57   29.4  2.45  27.5  30.0
## stats
### Male (WT vs KO): # we hypothesized mean abundance of glycan utilizer B. infantis in WT is greater than mean abundance of that in KO: one-sided (greater)
stats <- t.test(total_area ~ GenotypeSex, data=area_2, alternative="greater",
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats
## 
##  Welch Two Sample t-test
## 
## data:  total_area by GenotypeSex
## t = 2.1027, df = 13.345, p-value = 0.0275
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  0.7809354       Inf
## sample estimates:
## mean in group WT Male mean in group KO Male 
##              31.05123              26.15388
# Effect size
set.seed(123)
effectsize <- area_1 %>% 
  as.data.frame()%>%
  wilcox_effsize(total_area ~ GenotypeSex)
effectsize
## # A tibble: 6 × 7
##   .y.        group1    group2    effsize    n1    n2 magnitude
## * <chr>      <chr>     <chr>       <dbl> <int> <int> <ord>    
## 1 total_area WT Male   WT Female  0.0525     8     8 small    
## 2 total_area WT Male   KO Male    0.446      8     8 moderate 
## 3 total_area WT Male   KO Female  0.289      8     8 small    
## 4 total_area WT Female KO Male    0.394      8     8 moderate 
## 5 total_area WT Female KO Female  0.341      8     8 moderate 
## 6 total_area KO Male   KO Female  0.158      8     8 small

Abx+bifidum

df_detection_limit_1<-df_all_model2 %>%
  filter(Group=="ABX_PRO" & Treatment== "Bifidum") %>% 
  filter(Timepoint == "Last_abx_day") %>%  
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>%
  select(Mice_ID,Copies_DNA_log)

df_shape_1 <- df_all_model2 %>% 
  filter(Group=="ABX_PRO" & Treatment== "Bifidum") %>% 
  filter(Timepoint != "Last_abx_day") %>%
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>% 
  mutate(Day = as.numeric(gsub("D","",gsub("_post_gavage","",Timepoint)))) %>%
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  select(Day,Genotype,Sex,GenotypeSex,Mice_ID,Copies_DNA_log) %>%
  inner_join(df_detection_limit_1,by="Mice_ID") %>%
  rename(Copies_DNA_log = Copies_DNA_log.x,
         Copies_DNA_log_dl = Copies_DNA_log.y) %>%
  arrange (Mice_ID, Day) %>%
  group_by (Mice_ID) %>%
  mutate(Day_difference = Day - lag(Day)) %>%
  mutate(l_difference = Copies_DNA_log - Copies_DNA_log_dl) %>%
  mutate(area = (l_difference + lag(l_difference)) * Day_difference/2)

# Area and line data
## All genotype x sex type
area_1 <- df_shape_1 %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_1<-df_shape_1 %>%
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log))

## Male (WT vs KO)
area_2 <- area_1 %>% 
  filter(Sex=="Male")

line_data_2<-line_data_1 %>% 
  filter(GenotypeSex == "WT Male" | GenotypeSex == "KO Male")

# Figure
## Male (WT vs KO)  
figure_2 <- ggplot(data=line_data_2, aes(x=Day,y=Copies_DNA_log_mean,group=GenotypeSex))+
  geom_line(size=1,aes(color=GenotypeSex)) +
  geom_point(size=2,aes(color=GenotypeSex,shape=GenotypeSex))+
  geom_pointrange(aes(ymin=Copies_DNA_log_mean-Copies_DNA_log_sd, ymax=Copies_DNA_log_mean+Copies_DNA_log_sd,color=GenotypeSex)) +
  theme_bw() +
  labs(x="Time (Days post-gavage)",y=expression(atop("B. bifidum abundance",paste (log[10]*"copies/ng faces DNA"),caption = ""))) + 
  theme(legend.position = "bottom", # right,left, bottom
        legend.title = element_text(face="plain", size = 14), 
        legend.text = element_text(face="plain", size = 14),
        axis.text.x=element_text(colour="black", face="plain", size=17), 
        axis.text.y=element_text(colour="black", face="plain", size=17),
        axis.title.x=element_text(margin = margin(t = 5), size=17),
        axis.title.y=element_text(margin = margin(r = 5), size=17),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_x_continuous(limits = c(0,14),breaks = seq(2,14,2))+
  scale_y_continuous(limits = c(0,7),breaks = seq(0,7,1))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2")) + 
  geom_hline(yintercept = median(df_shape_1$Copies_DNA_log_dl),linetype="dashed",color=c("#E7A79BFF"))+
  annotate("text",x=7,y=7,
           label = expression(italic("P") > "0.05"),hjust=0.5,size=6)
  
  figure_2 

# Stats
area_WTMale <- area_1 %>% 
  filter(GenotypeSex == "WT Male")

area_KOMale <- area_1 %>% 
  filter(GenotypeSex == "KO Male")

## D'Agostino-Pearson normality test ("omnibus K2" test)
  ##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen
statcompute(6,area_WTMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 2.747455
## 
## $pvalue
## [1] 0.2531615
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_KOMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 2.490448
## 
## $pvalue
## [1] 0.2878764
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
set.seed(123)

## Summary
Summary_area <- area_1 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(total_area, na.rm=TRUE),
    sd=sd(total_area, na.rm=TRUE),
    median= median (total_area, na.rm=TRUE),
    IQR = IQR (total_area,na.rm=TRUE),
    IQR25 = quantile(total_area, 0.25),
    IQR75 = quantile(total_area, 0.75)
  )
Summary_area
## # A tibble: 4 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  14.0  4.97   13.3  5.53  9.86  15.4
## 2 WT Female       8  14.4  4.73   14.2  7.25 10.5   17.7
## 3 KO Male         8  14.9  3.30   13.8  3.18 13.2   16.4
## 4 KO Female       8  10.2  5.89   12.8  7.68  6.06  13.7
## Anova stats
### Male (WT vs KO): # we hypothesized mean abundance of glycan utilizer B. bifidum in WT is greater than mean abundance of that in KO: one-sided (greater)
stats <- t.test(total_area ~ GenotypeSex, data=area_2,alternative="greater",
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats
## 
##  Welch Two Sample t-test
## 
## data:  total_area by GenotypeSex
## t = -0.42601, df = 12.168, p-value = 0.6612
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -4.656476       Inf
## sample estimates:
## mean in group WT Male mean in group KO Male 
##              13.98936              14.88849
# Effect size
set.seed(123)
effectsize <- area_1 %>% 
  as.data.frame()%>%
  wilcox_effsize(total_area ~ GenotypeSex)
effectsize
## # A tibble: 6 × 7
##   .y.        group1    group2    effsize    n1    n2 magnitude
## * <chr>      <chr>     <chr>       <dbl> <int> <int> <ord>    
## 1 total_area WT Male   WT Female  0.0263     8     8 small    
## 2 total_area WT Male   KO Male    0.210      8     8 small    
## 3 total_area WT Male   KO Female  0.263      8     8 small    
## 4 total_area WT Female KO Male    0.131      8     8 small    
## 5 total_area WT Female KO Female  0.315      8     8 moderate 
## 6 total_area KO Male   KO Female  0.394      8     8 moderate

Abx+breve

df_detection_limit_1<-df_all_model2 %>%
  filter(Group=="ABX_PRO" & Treatment== "Breve") %>% 
  filter(Timepoint == "Last_abx_day") %>%  
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>%
  select(Mice_ID,Copies_DNA_log)

df_shape_1 <- df_all_model2 %>% 
  filter(Group=="ABX_PRO" & Treatment== "Breve") %>% 
  filter(Timepoint != "Last_abx_day") %>%
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>% 
  mutate(Day = as.numeric(gsub("D","",gsub("_post_gavage","",Timepoint)))) %>%
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  select(Day,Genotype,Sex,GenotypeSex,Mice_ID,Copies_DNA_log) %>%
  inner_join(df_detection_limit_1,by="Mice_ID") %>%
  rename(Copies_DNA_log = Copies_DNA_log.x,
         Copies_DNA_log_dl = Copies_DNA_log.y) %>%
  arrange (Mice_ID, Day) %>%
  group_by (Mice_ID) %>%
  mutate(Day_difference = Day - lag(Day)) %>%
  mutate(l_difference = Copies_DNA_log - Copies_DNA_log_dl) %>%
  mutate(area = (l_difference + lag(l_difference)) * Day_difference/2)

# Area and line data
## All genotype x sex type
area_1 <- df_shape_1 %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_1<-df_shape_1 %>%
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log))

## Male (WT vs KO)
area_2 <- area_1 %>% 
  filter(Sex=="Male")

line_data_2<-line_data_1 %>% 
  filter(GenotypeSex == "WT Male" | GenotypeSex == "KO Male")

# Figure
## Male (WT vs KO)  
figure_2 <- ggplot(data=line_data_2, aes(x=Day,y=Copies_DNA_log_mean,group=GenotypeSex))+
  geom_line(size=1,aes(color=GenotypeSex)) +
  geom_point(size=2,aes(color=GenotypeSex,shape=GenotypeSex))+
  geom_pointrange(aes(ymin=Copies_DNA_log_mean-Copies_DNA_log_sd, ymax=Copies_DNA_log_mean+Copies_DNA_log_sd,color=GenotypeSex)) +
  theme_bw() +
  labs(x="Time (Days post-gavage)",y=expression(atop("B. breve abundance",paste (log[10]*"copies/ng faces DNA"),caption = ""))) + 
  theme(legend.position = "bottom", # right,left, bottom
        legend.title = element_text(face="plain", size = 14), 
        legend.text = element_text(face="plain", size = 14),
        axis.text.x=element_text(colour="black", face="plain", size=17), 
        axis.text.y=element_text(colour="black", face="plain", size=17),
        axis.title.x=element_text(margin = margin(t = 5), size=17),
        axis.title.y=element_text(margin = margin(r = 5), size=17),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_x_continuous(limits = c(0,14),breaks = seq(2,14,2))+
  scale_y_continuous(limits = c(0,7),breaks = seq(0,7,1))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2")) + 
  geom_hline(yintercept = median(df_shape_1$Copies_DNA_log_dl),linetype="dashed",color=c("#E7A79BFF"))+
  annotate("text",x=7,y=7,
           label = expression(italic("P") > "0.05"),hjust=0.5,size=6)

  figure_2 

# Stats
area_WTMale <- area_1 %>% 
  filter(GenotypeSex == "WT Male")

area_KOMale <- area_1 %>% 
  filter(GenotypeSex == "KO Male")

## D'Agostino-Pearson normality test ("omnibus K2" test)
  ##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen
statcompute(6,area_WTMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 3.317558
## 
## $pvalue
## [1] 0.1903713
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_KOMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 2.739815
## 
## $pvalue
## [1] 0.2541305
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
set.seed(123)

## Summary
Summary_area <- area_1 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(total_area, na.rm=TRUE),
    sd=sd(total_area, na.rm=TRUE),
    median= median (total_area, na.rm=TRUE),
    IQR = IQR (total_area,na.rm=TRUE),
    IQR25 = quantile(total_area, 0.25),
    IQR75 = quantile(total_area, 0.75)
  )
Summary_area
## # A tibble: 4 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  26.4  8.07   26.3  4.74  23.0  27.8
## 2 WT Female       8  30.7  5.43   30.9 10.2   25.9  36.1
## 3 KO Male         8  26.5  7.65   24.8 13.3   20.1  33.4
## 4 KO Female       8  31.9  5.75   30.0  9.32  28.5  37.8
## Anova stats
### Male (WT vs KO): # we hypothesized mean abundance of non-glycan utilizer B. breve in WT is equal to mean abundance of that in KO: two-sided
stats <- t.test(total_area ~ GenotypeSex, data=area_2, alternative="two.sided", 
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats
## 
##  Welch Two Sample t-test
## 
## data:  total_area by GenotypeSex
## t = -0.026727, df = 13.96, p-value = 0.9791
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.539195  8.329054
## sample estimates:
## mean in group WT Male mean in group KO Male 
##              26.44133              26.54640
# Effect size
set.seed(123)
effectsize <- area_1 %>% 
  as.data.frame()%>%
  wilcox_effsize(total_area ~ GenotypeSex)
effectsize
## # A tibble: 6 × 7
##   .y.        group1    group2    effsize    n1    n2 magnitude
## * <chr>      <chr>     <chr>       <dbl> <int> <int> <ord>    
## 1 total_area WT Male   WT Female  0.289      8     8 small    
## 2 total_area WT Male   KO Male    0.0525     8     8 small    
## 3 total_area WT Male   KO Female  0.420      8     8 moderate 
## 4 total_area WT Female KO Male    0.289      8     8 small    
## 5 total_area WT Female KO Female  0.158      8     8 small    
## 6 total_area KO Male   KO Female  0.420      8     8 moderate

Infantis

df_detection_limit_1<-df_all_model2 %>%
  filter(Model=="2" & Period == "Other") %>% 
  filter(Group=="PRO" & Treatment== "Infantis") %>% 
  filter(Timepoint == "Baseline") %>%  
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>%
  select(Mice_ID,Copies_DNA_log)

df_shape_1 <- df_all_model2 %>% 
  filter(Model=="2" & Period == "Post_gavage") %>% 
  filter(Group=="PRO" & Treatment== "Infantis") %>% 
  filter(Timepoint != "Baseline") %>%
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>% 
  mutate(Day = as.numeric(gsub("D","",gsub("_post_gavage","",Timepoint)))) %>%
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  select(Day,Genotype,Sex,GenotypeSex,Mice_ID,Copies_DNA_log) %>%
  inner_join(df_detection_limit_1,by="Mice_ID") %>%
  rename(Copies_DNA_log = Copies_DNA_log.x,
         Copies_DNA_log_dl = Copies_DNA_log.y) %>%
  arrange (Mice_ID, Day) %>%
  group_by (Mice_ID) %>%
  mutate(Day_difference = Day - lag(Day)) %>%
  mutate(l_difference = Copies_DNA_log - Copies_DNA_log_dl) %>%
  mutate(area = (l_difference + lag(l_difference)) * Day_difference/2)

# Area and line data
## All genotype x sex type
area_1 <- df_shape_1 %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_1<-df_shape_1 %>%
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log))

## Male (WT vs KO)
area_2 <- area_1 %>% 
  filter(Sex=="Male")

line_data_2<-line_data_1 %>% 
  filter(GenotypeSex == "WT Male" | GenotypeSex == "KO Male")

# Figure
## Male (WT vs KO)  
figure_2 <- ggplot(data=line_data_2, aes(x=Day,y=Copies_DNA_log_mean,group=GenotypeSex))+
  geom_line(size=1,aes(color=GenotypeSex)) +
  geom_point(size=2,aes(color=GenotypeSex,shape=GenotypeSex))+
  geom_pointrange(aes(ymin=Copies_DNA_log_mean-Copies_DNA_log_sd, ymax=Copies_DNA_log_mean+Copies_DNA_log_sd,color=GenotypeSex)) +
  theme_bw() +
  labs(x="Time (Days post-gavage)",y=expression(atop("B. infantis abundance",paste (log[10]*"copies/ng faces DNA"),caption = ""))) + 
  theme(legend.position = "bottom", # right,left, bottom
        legend.title = element_text(face="plain", size = 14), 
        legend.text = element_text(face="plain", size = 14),
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 5), size=14),
        axis.title.y=element_text(margin = margin(r = 5), size=14),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 1,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_x_continuous(limits = c(0,14),breaks = seq(2,14,2))+
  scale_y_continuous(limits = c(0,8),breaks = seq(0,8,2))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2")) + 
  geom_hline(yintercept = median(df_shape_1$Copies_DNA_log_dl),linetype="dashed",color=c("#E7A79BFF"))+
  annotate("text",x=8,y=0.6,
           label="Detection limit: 0.10", hjust = 0, size=4)
  
  figure_2 

# Stats
area_WTMale <- area_1 %>% 
  filter(GenotypeSex == "WT Male")

area_KOMale <- area_1 %>% 
  filter(GenotypeSex == "KO Male")

## D'Agostino-Pearson normality test ("omnibus K2" test)
  ##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen
statcompute(6,area_WTMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 0.05181577
## 
## $pvalue
## [1] 0.9744248
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_KOMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 0.1447826
## 
## $pvalue
## [1] 0.9301669
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
set.seed(123)

## Summary
Summary_area <- area_1 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(total_area, na.rm=TRUE),
    sd=sd(total_area, na.rm=TRUE),
    median= median (total_area, na.rm=TRUE),
    IQR = IQR (total_area,na.rm=TRUE),
    IQR25 = quantile(total_area, 0.25),
    IQR75 = quantile(total_area, 0.75)
  )
Summary_area
## # A tibble: 4 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  1.43 0.294   1.48 0.326  1.22  1.54
## 2 WT Female       8  3.35 0.458   3.28 0.409  3.17  3.58
## 3 KO Male         8  2.55 0.618   2.53 0.644  2.21  2.85
## 4 KO Female       8  3.20 0.758   3.17 0.928  2.86  3.79
## T test
### Male (WT vs KO): # we hypothesized mean abundance of glycan utilizer B. infantis in WT is equal to mean abundance of that in KO: two-sided because of indigenous microbiome
stats <- t.test(total_area ~ GenotypeSex, data=area_2,alternative="two.sided",
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats
## 
##  Welch Two Sample t-test
## 
## data:  total_area by GenotypeSex
## t = -4.6383, df = 10.027, p-value = 0.0009177
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -1.6606453 -0.5831718
## sample estimates:
## mean in group WT Male mean in group KO Male 
##              1.425155              2.547063
# Effect size
set.seed(123)
effectsize <- area_1 %>% 
  as.data.frame()%>%
  wilcox_effsize(total_area ~ GenotypeSex)
effectsize
## # A tibble: 6 × 7
##   .y.        group1    group2    effsize    n1    n2 magnitude
## * <chr>      <chr>     <chr>       <dbl> <int> <int> <ord>    
## 1 total_area WT Male   WT Female  0.840      8     8 large    
## 2 total_area WT Male   KO Male    0.788      8     8 large    
## 3 total_area WT Male   KO Female  0.840      8     8 large    
## 4 total_area WT Female KO Male    0.578      8     8 large    
## 5 total_area WT Female KO Female  0.0788     8     8 small    
## 6 total_area KO Male   KO Female  0.473      8     8 moderate
# Model 2
df_detection_limit_1<-df_all_model2 %>%
  filter(Model=="2" & Period == "Other") %>% 
  filter(Group=="PRO" & Treatment== "Infantis") %>% 
  filter(Timepoint == "Baseline") %>%  
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>%
  select(Mice_ID,Copies_DNA_log)

df_shape_1 <- df_all_model2 %>% 
  filter(Model=="2" ) %>% 
  filter(Group=="PRO" & Treatment== "Infantis") %>% 
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>% 
  mutate(Day = as.numeric(gsub("D","",Date))) %>%
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  filter(GenotypeSex == "WT Male" | GenotypeSex == "KO Male") %>% 
  select(Day,Genotype,Sex,GenotypeSex,Mice_ID,Copies_DNA_log) %>%
  inner_join(df_detection_limit_1,by="Mice_ID") %>%
  rename(Copies_DNA_log = Copies_DNA_log.x,
         Copies_DNA_log_dl = Copies_DNA_log.y) %>%
  arrange (Mice_ID, Day) %>%
  group_by (Mice_ID) %>%
  mutate(Day_difference = Day - lag(Day)) %>%
  mutate(l_difference = Copies_DNA_log - Copies_DNA_log_dl) %>%
  mutate(area = (l_difference + lag(l_difference)) * Day_difference/2)

# Model 1: confirmatory results during gavage period
df_detection_limit_2<-df_all_model2 %>%
  filter(Model=="1") %>% 
  filter(Group=="PRO" & Treatment== "Infantis") %>% 
  filter(Timepoint == "Baseline") %>%  
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>%
  select(Mice_ID,Copies_DNA_log)

df_shape_2 <- df_all_model2 %>% 
  filter(Model=="1" ) %>% 
  filter(Group=="PRO" & Treatment== "Infantis") %>% 
  mutate(Copies_DNA_log = log10(Copies_DNA+1)) %>% 
  mutate(Day = as.numeric(gsub("D","",gsub("A","",gsub("B",".5",Date))))) %>%
  separate(GenotypeSex, c("Genotype","Sex"), sep = "_",remove = FALSE) %>% 
  mutate(Genotype=factor(Genotype,levels = c("WT","KO"))) %>% 
  mutate(Sex = factor(Sex, levels = c("Male", "Female"))) %>% 
  mutate(GenotypeSex = as.factor(gsub("_"," ",GenotypeSex))) %>% 
  mutate(GenotypeSex = factor(GenotypeSex,levels = c("WT Male","WT Female","KO Male","KO Female"))) %>% 
  filter(GenotypeSex == "WT Male" | GenotypeSex == "KO Male") %>% 
  select(Day,Genotype,Sex,GenotypeSex,Mice_ID,Copies_DNA_log) %>%
  inner_join(df_detection_limit_2,by="Mice_ID") %>%
  rename(Copies_DNA_log = Copies_DNA_log.x,
         Copies_DNA_log_dl = Copies_DNA_log.y) %>%
  arrange (Mice_ID, Day) %>%
  group_by (Mice_ID) %>%
  mutate(Day_difference = Day - lag(Day)) %>%
  mutate(l_difference = Copies_DNA_log - Copies_DNA_log_dl) %>%
  mutate(area = (l_difference + lag(l_difference)) * Day_difference/2)


# Area and line data
## post-gavage area
area_1 <- df_shape_1 %>% 
  filter(Day %in% c("6", "7", "8","10","12")) %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_1<-df_shape_1 %>%
  filter(Day %in% c("6", "7", "8","10","12")) %>% 
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log))

## during gavage + post-gavage area
area_2 <- df_shape_1 %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_2<-df_shape_1 %>%
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log))

## Model1: during gavage
area_3 <- df_shape_2 %>% 
  select(Mice_ID,Genotype,Sex,GenotypeSex,area) %>% 
  group_by(Mice_ID,Genotype,Sex, GenotypeSex) %>% 
  summarise(total_area=sum(area,na.rm=T))

line_data_3<-df_shape_2 %>%
  group_by(Day,GenotypeSex) %>%
  summarise(Copies_DNA_log_mean=mean(Copies_DNA_log),
            Copies_DNA_log_sd=sd(Copies_DNA_log)) %>% 
  filter(Day != "0")


# Stats
area_1_WTMale <- area_1 %>% 
  filter(GenotypeSex == "WT Male")

area_1_KOMale <- area_1 %>% 
  filter(GenotypeSex == "KO Male")

area_2_WTMale <- area_2 %>% 
  filter(GenotypeSex == "WT Male")

area_2_KOMale <- area_2 %>% 
  filter(GenotypeSex == "KO Male")

## D'Agostino-Pearson normality test ("omnibus K2" test) - Parametric data here
  ##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
  ##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
  ## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
  ## in this case, all notmally distributed, therefore one-way anova is chosen
statcompute(6,area_1_WTMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 0.3792575
## 
## $pvalue
## [1] 0.8272662
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_1_KOMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 0.4473146
## 
## $pvalue
## [1] 0.7995891
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_2_WTMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 1.199871
## 
## $pvalue
## [1] 0.5488472
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
statcompute(6,area_2_KOMale$`total_area`, levels = c(0.05), critvalL = NULL, critvalR = NULL,
            alter = 0, stat.pars = NULL)
## $statistic
## [1] 1.300677
## 
## $pvalue
## [1] 0.521869
## 
## $decision
## [1] 0
## 
## $alter
## [1] 3
## 
## $stat.pars
## [1] NA
## 
## $symbol
## [1] "K^2"
## Summary
Summary_1_area <- area_1 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(total_area, na.rm=TRUE),
    sd=sd(total_area, na.rm=TRUE),
    median= median (total_area, na.rm=TRUE),
    IQR = IQR (total_area,na.rm=TRUE),
    IQR25 = quantile(total_area, 0.25),
    IQR75 = quantile(total_area, 0.75)
  )
Summary_1_area
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  3.78 0.562   3.70 0.594  3.45  4.05
## 2 KO Male         8  5.58 0.898   5.41 1.23   5.04  6.27
Summary_2_area <- area_2 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(total_area, na.rm=TRUE),
    sd=sd(total_area, na.rm=TRUE),
    median= median (total_area, na.rm=TRUE),
    IQR = IQR (total_area,na.rm=TRUE),
    IQR25 = quantile(total_area, 0.25),
    IQR75 = quantile(total_area, 0.75)
  )
Summary_2_area
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  13.4  1.52   14.0  2.28  12.2  14.5
## 2 KO Male         8  17.0  1.80   17.3  1.85  16.1  17.9
## T test
### Male (WT vs KO): # we hypothesized mean abundance of glycan utilizer B. infantis in WT is equal to mean abundance of that in KO: two-sided because of indigenous microbiome
stats_1 <- t.test(total_area ~ GenotypeSex, data=area_1,alternative="two.sided",
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_1
## 
##  Welch Two Sample t-test
## 
## data:  total_area by GenotypeSex
## t = -4.8032, df = 11.752, p-value = 0.0004571
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.6165149 -0.9808706
## sample estimates:
## mean in group WT Male mean in group KO Male 
##              3.777180              5.575873
stats_2 <- t.test(total_area ~ GenotypeSex, data=area_2,alternative="two.sided",
                     paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats_2
## 
##  Welch Two Sample t-test
## 
## data:  total_area by GenotypeSex
## t = -4.2425, df = 13.636, p-value = 0.0008672
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -5.325218 -1.742926
## sample estimates:
## mean in group WT Male mean in group KO Male 
##              13.41924              16.95331
# Effect size
set.seed(123)

effectsize_1 <- area_1 %>% 
  as.data.frame()%>%
  mutate(GenotypeSex=factor(GenotypeSex)) %>% 
  wilcox_effsize(total_area ~ GenotypeSex)
effectsize_1
## # A tibble: 1 × 7
##   .y.        group1  group2  effsize    n1    n2 magnitude
## * <chr>      <chr>   <chr>     <dbl> <int> <int> <ord>    
## 1 total_area WT Male KO Male   0.788     8     8 large
effectsize_2 <- area_2 %>% 
  as.data.frame()%>%
  mutate(GenotypeSex=factor(GenotypeSex)) %>%
  wilcox_effsize(total_area ~ GenotypeSex)
effectsize_2
## # A tibble: 1 × 7
##   .y.        group1  group2  effsize    n1    n2 magnitude
## * <chr>      <chr>   <chr>     <dbl> <int> <int> <ord>    
## 1 total_area WT Male KO Male   0.709     8     8 large
# min.line
min.line<-line_data_2%>%
  select(-Copies_DNA_log_sd)%>%
  spread(GenotypeSex,Copies_DNA_log_mean)%>%
  group_by(Day)%>%
  mutate(min_line=min(`WT Male`,`KO Male`))%>%
  select(Day,min_line)

line_data_2_min<-line_data_2 %>%
  left_join(min.line,by=c("Day"="Day")) %>% 
  filter(Day %in% c(6,7,8,10,12))

line_ribbon <- line_data_2_min %>% 
  select (Day, GenotypeSex, Copies_DNA_log_mean) %>% 
  spread(GenotypeSex, Copies_DNA_log_mean)

# Figure
figure <- ggplot()+
  geom_line(size=0.5,aes(x=Day,y=Copies_DNA_log_mean,group=GenotypeSex, color=GenotypeSex), data = line_data_2, alpha =0.5) +
  geom_line(size=0.25,aes(x=Day,y=Copies_DNA_log_mean,group=GenotypeSex, color=GenotypeSex), data = line_data_3, linetype = "dashed", alpha=0.5) +
  geom_pointrange(
    size =0.7, 
    shape =16,
    mapping = aes(x=Day,y=Copies_DNA_log_mean,ymin=Copies_DNA_log_mean-Copies_DNA_log_sd, ymax=Copies_DNA_log_mean+Copies_DNA_log_sd,color=GenotypeSex), 
    data = line_data_2,
    width = 0)+
  geom_pointrange(
    size =0.35, 
    shape =16,
    mapping = aes(x=Day,y=Copies_DNA_log_mean,ymin=Copies_DNA_log_mean-Copies_DNA_log_sd, ymax=Copies_DNA_log_mean+Copies_DNA_log_sd,color=GenotypeSex), 
    data = line_data_3,
    alpha = 0.5,
    width = 0)+  
  theme_bw() +
  labs(x="Time (Days)",y=expression(atop("B. infantis abundance",paste (log[10]*"copies/ng faces DNA"),caption = ""))) +
  theme(legend.position = "bottom", # right,left, bottom
        legend.title = element_text(face="plain", size = 12), 
        legend.text = element_text(face="plain", size = 12),
        axis.text.x=element_text(colour="black", face="plain", size=15), 
        axis.text.y=element_text(colour="black", face="plain", size=15),
        axis.title.x=element_text(margin = margin(t = 5), size=15),
        axis.title.y=element_text(margin = margin(r = 5), size=15),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust=0.5, face="plain"),
        panel.grid = element_blank(),
        panel.border = element_rect(color="black", #element_rect is often used for backgrounds and borders
                                    fill = NA,
                                    size = 1),
        legend.background = element_rect(color="transparent"),
        aspect.ratio = 0.8,
        plot.background = element_rect(fill="transparent", colour = "transparent"))+
  scale_x_continuous(limits = c(0,12),breaks = seq(0,12,1))+
  scale_y_continuous(limits = c(0,6),breaks = seq(0,6,1))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2")) + 
  geom_vline(xintercept = 6, linetype ="dashed", color ="black", alpha = 0.3)+
  geom_hline(yintercept = median(df_shape_1$Copies_DNA_log_dl),linetype="dashed",color=c("#E7A79BFF"))+
  geom_ribbon(data = line_ribbon, aes(x=Day,ymax=`WT Male`, ymin=`KO Male`),fill="#cfcfcf",alpha = 0.2)+ 
  annotate("text",x=9.5,y=6,
           label = expression(italic("P") == "0.00046"),hjust=0.5,size=5)


figure

# Model 1
## Shape data
df_shape_stats_2 <- df_shape_2%>% 
  select(Day, GenotypeSex,Copies_DNA_log) %>% 
  filter(GenotypeSex == "KO Male") %>%
  rename(KO_Male = Copies_DNA_log ) %>%
  cbind(df_shape_2 %>% select (Day,GenotypeSex,Copies_DNA_log) %>% filter(GenotypeSex == "WT Male") %>% rename (WT_Male = Copies_DNA_log)) %>%
  select(Day=2,KO_Male,WT_Male) %>%
  arrange(Day)

df_shape_2 <- df_shape_2%>% 
  select(Day, GenotypeSex,Copies_DNA_log)

## Man-whitney test
### Day1.5
df_shape_stats_2_d1.5 <- df_shape_2 %>% 
  filter(Day == 1.5)

Summary_model1_1.5 <- df_shape_stats_2_d1.5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d1.5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d1.5,
                   exact = FALSE)

### Day2
df_shape_stats_2_d2 <- df_shape_2 %>% 
  filter(Day == 2)

Summary_model1_2 <- df_shape_stats_2_d2 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d2 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d2,
                   exact = FALSE)

### Day2.5
df_shape_stats_2_d2.5 <- df_shape_2 %>% 
  filter(Day == 2.5)

Summary_model1_2.5 <- df_shape_stats_2_d2.5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d2.5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d2.5,
                   exact = FALSE)

### Day3
df_shape_stats_2_d3 <- df_shape_2 %>% 
  filter(Day == 3)

Summary_model1_3 <- df_shape_stats_2_d3 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d3 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d3,
                   exact = FALSE)

### Day3.5
df_shape_stats_2_d3.5 <- df_shape_2 %>% 
  filter(Day == 3.5)

Summary_model1_3.5 <- df_shape_stats_2_d3.5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d3.5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d3.5,
                   exact = FALSE)

### Day4
df_shape_stats_2_d4 <- df_shape_2 %>% 
  filter(Day == 4)

Summary_model1_4 <- df_shape_stats_2_d4 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d4 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d4,
                   exact = FALSE)

### Day4.5
df_shape_stats_2_d4.5 <- df_shape_2 %>% 
  filter(Day == 4.5)

Summary_model1_4.5 <- df_shape_stats_2_d4.5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d4.5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d4.5,
                   exact = FALSE)

### Day5
df_shape_stats_2_d5 <- df_shape_2 %>% 
  filter(Day == 5)

Summary_model1_5 <- df_shape_stats_2_d5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d5,
                   exact = FALSE)


### Day5.5
df_shape_stats_2_d5.5 <- df_shape_2 %>% 
  filter(Day == 5.5)

Summary_model1_5.5 <- df_shape_stats_2_d5.5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d5.5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_2_d5.5,
                   exact = FALSE)

## Stats outcomes
### 6 hr post-gavage
d1.5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 14, p-value = 0.8345
## alternative hypothesis: true location shift is not equal to 0
d2.5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 15, p-value = 0.6761
## alternative hypothesis: true location shift is not equal to 0
d3.5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 15, p-value = 0.6761
## alternative hypothesis: true location shift is not equal to 0
d4.5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 14, p-value = 0.8345
## alternative hypothesis: true location shift is not equal to 0
d5.5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 23, p-value = 0.03671
## alternative hypothesis: true location shift is not equal to 0
Summary_model1_1.5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  5.20 0.196   5.32 0.304  5.02  5.33
## 2 KO Male         5  5.20 0.230   5.23 0.229  5.03  5.26
Summary_model1_2.5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  5.25 0.229   5.21 0.226  5.12  5.35
## 2 KO Male         5  5.14 0.262   5.25 0.431  4.87  5.30
Summary_model1_3.5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  5.16 0.250   5.10 0.394  4.97  5.36
## 2 KO Male         5  4.95 0.634   5.01 0.537  4.76  5.30
Summary_model1_4.5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median    IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
## 1 WT Male         5  4.83 0.430   5.01 0.625   4.56  5.18
## 2 KO Male         5  4.84 0.139   4.81 0.0964  4.77  4.87
Summary_model1_5.5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median    IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
## 1 WT Male         5  5.39 0.143   5.38 0.0598  5.36  5.42
## 2 KO Male         5  5.04 0.200   4.96 0.297   4.90  5.20
### 22 hr post-gavage
d2
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 3, p-value = 0.0601
## alternative hypothesis: true location shift is not equal to 0
d3
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 11, p-value = 0.8345
## alternative hypothesis: true location shift is not equal to 0
d4
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 12, p-value = 1
## alternative hypothesis: true location shift is not equal to 0
d5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 9, p-value = 0.5309
## alternative hypothesis: true location shift is not equal to 0
Summary_model1_2
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  1.99 0.624   1.93 0.611  1.53  2.15
## 2 KO Male         5  2.73 0.443   2.53 0.720  2.48  3.20
Summary_model1_3
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  2.43 0.696   2.42 1.16   1.89  3.05
## 2 KO Male         5  2.65 0.377   2.73 0.452  2.35  2.80
Summary_model1_4
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  2.11 0.930   2.42 1.08   1.69  2.77
## 2 KO Male         5  2.37 0.485   2.58 0.229  2.42  2.65
Summary_model1_5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         5  2.68 0.505   2.55 0.625  2.38  3.01
## 2 KO Male         5  2.88 0.350   3.09 0.302  2.80  3.10
# Model 2
df_shape_1 <- df_shape_1%>% 
  select(Day, GenotypeSex,Copies_DNA_log)

## D2
df_shape_stats_1_d2 <- df_shape_1 %>% 
  filter(Day == 2)

Summary_model2_2 <- df_shape_stats_1_d2 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d2 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_1_d2,
                   exact = FALSE)

## D3
df_shape_stats_1_d3 <- df_shape_1 %>% 
  filter(Day == 2)

Summary_model2_3 <- df_shape_stats_1_d3 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d3 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_1_d3,
                   exact = FALSE)


##D4
df_shape_stats_1_d4 <- df_shape_1 %>% 
  filter(Day == 4)

Summary_model2_4 <- df_shape_stats_1_d4 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d4 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_1_d4,
                   exact = FALSE)

##D5
df_shape_stats_1_d5 <- df_shape_1 %>% 
  filter(Day == 5)

Summary_model2_5 <- df_shape_stats_1_d5 %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

d5 <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_stats_1_d5,
                   exact = FALSE)
## Outcome
d2
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 15, p-value = 0.08312
## alternative hypothesis: true location shift is not equal to 0
d3
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 15, p-value = 0.08312
## alternative hypothesis: true location shift is not equal to 0
d4
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 14, p-value = 0.06608
## alternative hypothesis: true location shift is not equal to 0
d5
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 7, p-value = 0.01008
## alternative hypothesis: true location shift is not equal to 0
Summary_model2_2
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  2.31 0.393   2.28 0.414  2.07  2.48
## 2 KO Male         8  2.68 0.262   2.66 0.168  2.63  2.79
Summary_model2_3
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  2.31 0.393   2.28 0.414  2.07  2.48
## 2 KO Male         8  2.68 0.262   2.66 0.168  2.63  2.79
Summary_model2_4
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  2.72 0.375   2.71 0.346  2.52  2.87
## 2 KO Male         8  3.15 0.355   3.15 0.300  3.01  3.31
Summary_model2_5
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male         8  2.35 0.395   2.23 0.374  2.08  2.46
## 2 KO Male         8  2.96 0.379   2.87 0.642  2.74  3.38
# Model 1: 
## Combine all 6 hr post-gavage point
df_shape_2_6hr <- df_shape_2%>% 
  filter(Day != 0) %>%
  filter(Day %in% c(1.5,2.5,3.5,4.5,5.5)) %>% 
  select(Day, GenotypeSex,Copies_DNA_log)

Summary_model1_6hr <- df_shape_2_6hr %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

stats_model1_6hr <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_2_6hr,
                   exact = FALSE)

## Combine all 22 hr post-gavage point
df_shape_2_22hr <- df_shape_2%>% 
  filter(Day != 0) %>%
  filter(Day %in% c(2,3,4,5)) %>% 
  select(Day, GenotypeSex,Copies_DNA_log)

## Outcome
Summary_model1_6hr
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male        25  5.17 0.308   5.20 0.357  5.01  5.36
## 2 KO Male        25  5.03 0.339   5.01 0.402  4.86  5.26
stats_model1_6hr 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 411, p-value = 0.05724
## alternative hypothesis: true location shift is not equal to 0
## Figure
level_order_genotypesex <- c("WT Male", "KO Male")
figure_model1_6hr <- ggplot(data=df_shape_2_6hr, mapping = aes(y=Copies_DNA_log, x=factor(GenotypeSex,level=level_order_genotypesex)))+
  geom_boxplot(outlier.shape = NA)+
  theme_bw()+
  geom_jitter(aes(colour=GenotypeSex),
              size=2,alpha=0.8,
              position=position_jitterdodge(jitter.width = 1.8,
                                            jitter.height = 0, 
                                            dodge.width = 0.8))+
  labs(x="",y=expression(atop(italic("B. infantis"),paste('(log'['10']*'copies/g tissue)') ,caption = "")))+
  theme(legend.position="none",
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 5), size=14),
        axis.title.y=element_text(margin = margin(r = 5), size=14),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border =  element_rect(color = "black",
                                    fill = NA,
                                    size = 1),
        aspect.ratio = 1.5,
        plot.background = element_rect(fill = "transparent", colour = "transparent"))+
  scale_y_continuous(limits = c(0,8),breaks = seq(0,8,2))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2"))+
  scale_fill_manual(values=c("#3C5488B2","#7E6148B2"))+
  annotate("text",x=1.5,y=8,
           label = expression(italic("P") > "0.05"),hjust=0.5,size=5)

figure_model1_6hr

# Model 2
## Combine all 22 hr post-gavage point
df_shape_1_22hr <- df_shape_1%>% 
  filter(Day != 0) %>%
  filter(Day %in% c(2,3,4,5)) %>% 
  select(Day, GenotypeSex,Copies_DNA_log)

Summary_model2_22hr <- df_shape_1_22hr %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )

## Outcomes
Summary_model2_22hr
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male        32  2.45 0.421   2.39 0.658  2.13  2.79
## 2 KO Male        32  2.91 0.351   2.88 0.476  2.67  3.14
# Model 1 + 2
## Combine all 22 hr post-gavage point
df_shape_1and2_22hr <- full_join(df_shape_1_22hr,df_shape_2_22hr)

Summary_model_1and2_22hr <- df_shape_1and2_22hr %>% 
  group_by(GenotypeSex) %>% 
  summarise(
    count = n(),
    mean=mean(Copies_DNA_log, na.rm=TRUE),
    sd=sd(Copies_DNA_log, na.rm=TRUE),
    median= median (Copies_DNA_log, na.rm=TRUE),
    IQR = IQR (Copies_DNA_log,na.rm=TRUE),
    IQR25 = quantile(Copies_DNA_log, 0.25),
    IQR75 = quantile(Copies_DNA_log, 0.75)
  )


stats_model_1and2_22hr <- wilcox.test(Copies_DNA_log ~ GenotypeSex, data = df_shape_1and2_22hr,
                   exact = FALSE)
## Outcomes
Summary_model_1and2_22hr
## # A tibble: 2 × 8
##   GenotypeSex count  mean    sd median   IQR IQR25 IQR75
##   <fct>       <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>
## 1 WT Male        52  2.39 0.546   2.40 0.720  2.09  2.82
## 2 KO Male        52  2.81 0.397   2.80 0.524  2.58  3.10
stats_model_1and2_22hr
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  Copies_DNA_log by GenotypeSex
## W = 714, p-value = 3.405e-05
## alternative hypothesis: true location shift is not equal to 0
## Figure
level_order_genotypesex <- c("WT Male", "KO Male")
figure_model_1and2_22hr <- ggplot(data=df_shape_1and2_22hr, mapping = aes(y=Copies_DNA_log, x=factor(GenotypeSex,level=level_order_genotypesex)))+
  geom_boxplot(outlier.shape = NA)+
  theme_bw()+
  geom_jitter(aes(colour=GenotypeSex),
              size=2,alpha=0.8,
              position=position_jitterdodge(jitter.width = 1.8,
                                            jitter.height = 0, 
                                            dodge.width = 0.8))+
  labs(x="",y=expression(atop(italic("B. infantis"),paste('(log'['10']*'copies/g tissue)'),caption = "")))+
  theme(legend.position="none",
        axis.text.x=element_text(colour="black", face="plain", size=14), 
        axis.text.y=element_text(colour="black", face="plain", size=14),
        axis.title.x=element_text(margin = margin(t = 5), size=14),
        axis.title.y=element_text(margin = margin(r = 5), size=14),
        axis.title = element_text(face="plain", size = 10),
        plot.title = element_text(size=10, hjust = 0.5),
        panel.grid = element_blank(),
        panel.border =  element_rect(color = "black",
                                    fill = NA,
                                    size = 1),
        aspect.ratio = 1.5,
        plot.background = element_rect(fill = "transparent", colour = "transparent"))+
  scale_y_continuous(limits = c(0,8),breaks = seq(0,8,2))+
  scale_colour_manual(values=c("#3C5488B2","#7E6148B2"))+
  scale_fill_manual(values=c("#3C5488B2","#7E6148B2"))+
  annotate("text",x=1.5,y=8,
           label = expression(italic("P") == "0.000034"),hjust=0.5,size=5)

figure_model_1and2_22hr